{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "40990b60-87f9-4782-8bfc-264256850abc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f557b74e-b28e-441d-9ce9-65d50c54d4b4",
   "metadata": {},
   "source": [
    "# 数据框的分组与聚合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b308b1fd-cbef-4ca1-9abb-0823613abd34",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>801</td>\n",
       "      <td>B</td>\n",
       "      <td>20</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>802</td>\n",
       "      <td>B</td>\n",
       "      <td>16</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>803</td>\n",
       "      <td>B</td>\n",
       "      <td>38</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>804</td>\n",
       "      <td>A</td>\n",
       "      <td>16</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>805</td>\n",
       "      <td>B</td>\n",
       "      <td>44</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>806</td>\n",
       "      <td>A</td>\n",
       "      <td>38</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>807</td>\n",
       "      <td>A</td>\n",
       "      <td>17</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>808</td>\n",
       "      <td>A</td>\n",
       "      <td>15</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    工号 部门  年龄  绩效\n",
       "0  801  B  20  17\n",
       "1  802  B  16  45\n",
       "2  803  B  38  13\n",
       "3  804  A  16  25\n",
       "4  805  B  44  44\n",
       "5  806  A  38  20\n",
       "6  807  A  17  32\n",
       "7  808  A  15  42"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.seed(99)\n",
    "df = pd.DataFrame({\n",
    "    \"工号\": np.arange(801, 809),\n",
    "    \"部门\": np.random.choice(['A', 'B'], size=8),\n",
    "    \"年龄\": np.random.randint(15, 50, 8),\n",
    "    \"绩效\": np.random.randint(5, 50, 8),\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b237dc4b-5333-42ad-ab8e-d0eb351703bc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bc7e046e-efd2-4db7-a4cc-294658161a7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f6bec160340>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"部门\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c2770e9-e631-435d-a4fa-775f203a4ffe",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ff71d65d-cfd5-4a21-8916-b54d182428fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== 部门: ('A',)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>804</td>\n",
       "      <td>A</td>\n",
       "      <td>16</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>806</td>\n",
       "      <td>A</td>\n",
       "      <td>38</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>807</td>\n",
       "      <td>A</td>\n",
       "      <td>17</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>808</td>\n",
       "      <td>A</td>\n",
       "      <td>15</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    工号 部门  年龄  绩效\n",
       "3  804  A  16  25\n",
       "5  806  A  38  20\n",
       "6  807  A  17  32\n",
       "7  808  A  15  42"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== 部门: ('B',)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>801</td>\n",
       "      <td>B</td>\n",
       "      <td>20</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>802</td>\n",
       "      <td>B</td>\n",
       "      <td>16</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>803</td>\n",
       "      <td>B</td>\n",
       "      <td>38</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>805</td>\n",
       "      <td>B</td>\n",
       "      <td>44</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    工号 部门  年龄  绩效\n",
       "0  801  B  20  17\n",
       "1  802  B  16  45\n",
       "2  803  B  38  13\n",
       "4  805  B  44  44"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for dept, grp in df.groupby([\"部门\"]):\n",
    "    print(f\"== 部门:\", dept)\n",
    "    display(grp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a8444db4-a9eb-4282-92f4-7243be8e0335",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>部门</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>806.25</td>\n",
       "      <td>21.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>802.75</td>\n",
       "      <td>29.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        工号    年龄     绩效\n",
       "部门                     \n",
       "A   806.25  21.5  29.75\n",
       "B   802.75  29.5  29.75"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"部门\"]).agg('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "465b9e4b-e048-4b96-a807-dadd18c1ab07",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>部门</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>806.25</td>\n",
       "      <td>21.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>802.75</td>\n",
       "      <td>29.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        工号    年龄     绩效\n",
       "部门                     \n",
       "A   806.25  21.5  29.75\n",
       "B   802.75  29.5  29.75"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"部门\"]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "20541f3f-3b41-4c92-a870-bfde61184e07",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>部门</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>21.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>29.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      年龄     绩效\n",
       "部门             \n",
       "A   21.5  29.75\n",
       "B   29.5  29.75"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"部门\")[['年龄', '绩效']].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "095f8886-1bb4-4a60-83d5-cee94abcef16",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>21.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>29.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  部门    年龄     绩效\n",
       "0  A  21.5  29.75\n",
       "1  B  29.5  29.75"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"部门\", as_index=False)[['年龄', '绩效']].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8b3de18e-a2ad-4edb-a56d-e50717b27a79",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>21.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>29.5</td>\n",
       "      <td>29.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  部门    年龄     绩效\n",
       "0  A  21.5  29.75\n",
       "1  B  29.5  29.75"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"部门\")[['年龄', '绩效']].mean().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "68b63f35-a001-4deb-9f23-6afb3739444d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">绩效</th>\n",
       "      <th>年龄</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>median</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>部门</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>29.75</td>\n",
       "      <td>119</td>\n",
       "      <td>9.535023</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>29.75</td>\n",
       "      <td>119</td>\n",
       "      <td>17.114808</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       绩效                     年龄\n",
       "     mean  sum        std median\n",
       "部门                              \n",
       "A   29.75  119   9.535023   16.5\n",
       "B   29.75  119  17.114808   29.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.groupby('部门').agg({'绩效': ['mean', 'sum', 'std'], '年龄':'median'})\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ecf58139-c40c-455a-a332-d108bef8f3cc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>部门</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>29.75</td>\n",
       "      <td>119</td>\n",
       "      <td>9.535023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>29.75</td>\n",
       "      <td>119</td>\n",
       "      <td>17.114808</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     mean  sum        std\n",
       "部门                       \n",
       "A   29.75  119   9.535023\n",
       "B   29.75  119  17.114808"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.绩效"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae41d499-45ec-4090-800f-03f4b7ad65f1",
   "metadata": {},
   "source": [
    "### 分组transform数据转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "662f1be3-2b4b-4c70-8c96-4bae330fcbba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "部门\n",
       "A    29.75\n",
       "B    29.75\n",
       "Name: 绩效, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('部门').绩效.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "1a829560-b44e-4abc-9348-86d587f66a29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "      <th>部门平均绩效</th>\n",
       "      <th>部门内绩效差异</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>801</td>\n",
       "      <td>B</td>\n",
       "      <td>20</td>\n",
       "      <td>17</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-12.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>802</td>\n",
       "      <td>B</td>\n",
       "      <td>16</td>\n",
       "      <td>45</td>\n",
       "      <td>29.75</td>\n",
       "      <td>15.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>803</td>\n",
       "      <td>B</td>\n",
       "      <td>38</td>\n",
       "      <td>13</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-16.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>804</td>\n",
       "      <td>A</td>\n",
       "      <td>16</td>\n",
       "      <td>25</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>805</td>\n",
       "      <td>B</td>\n",
       "      <td>44</td>\n",
       "      <td>44</td>\n",
       "      <td>29.75</td>\n",
       "      <td>14.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>806</td>\n",
       "      <td>A</td>\n",
       "      <td>38</td>\n",
       "      <td>20</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-9.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>807</td>\n",
       "      <td>A</td>\n",
       "      <td>17</td>\n",
       "      <td>32</td>\n",
       "      <td>29.75</td>\n",
       "      <td>2.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>808</td>\n",
       "      <td>A</td>\n",
       "      <td>15</td>\n",
       "      <td>42</td>\n",
       "      <td>29.75</td>\n",
       "      <td>12.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    工号 部门  年龄  绩效  部门平均绩效  部门内绩效差异\n",
       "0  801  B  20  17   29.75   -12.75\n",
       "1  802  B  16  45   29.75    15.25\n",
       "2  803  B  38  13   29.75   -16.75\n",
       "3  804  A  16  25   29.75    -4.75\n",
       "4  805  B  44  44   29.75    14.25\n",
       "5  806  A  38  20   29.75    -9.75\n",
       "6  807  A  17  32   29.75     2.25\n",
       "7  808  A  15  42   29.75    12.25"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = df.copy()\n",
    "df3['部门平均绩效'] = df.groupby('部门')['绩效'].transform('mean')\n",
    "df3['部门内绩效差异'] = df3.绩效 - df3.部门平均绩效\n",
    "df3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31d88462-3b04-4251-b143-388e5bef8422",
   "metadata": {},
   "source": [
    "<br>\n",
    "\n",
    "排名函数或秩函数[`rank()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c047ba07-2a2f-4d97-ad5a-0cb6d7ec2f34",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    5.0\n",
       "1    3.0\n",
       "2    1.0\n",
       "3    3.0\n",
       "4    3.0\n",
       "5    6.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([30, 25, 10, 25, 25, 40]).rank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6a63419a-4b06-4176-b565-81018b79cb75",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2.0\n",
       "1    4.0\n",
       "2    1.0\n",
       "3    2.0\n",
       "4    3.0\n",
       "5    1.0\n",
       "6    3.0\n",
       "7    4.0\n",
       "Name: 绩效, dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('部门').绩效.rank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "95eeb56b-63e4-466a-8696-76c4f88ce985",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3.0\n",
       "1    1.0\n",
       "2    4.0\n",
       "3    3.0\n",
       "4    2.0\n",
       "5    4.0\n",
       "6    2.0\n",
       "7    1.0\n",
       "Name: 绩效, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('部门').绩效.rank(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "43424b3c-3e9d-4b59-a675-60b1ae0c7db5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3.0\n",
       "1    1.0\n",
       "2    4.0\n",
       "3    3.0\n",
       "4    2.0\n",
       "5    4.0\n",
       "6    2.0\n",
       "7    1.0\n",
       "Name: 绩效, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('部门')['绩效'].transform(\"rank\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e088177-bae7-43d7-ac1c-b9afd5934c60",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "bfb3cfe9-c47a-4486-b844-98924adc6299",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>工号</th>\n",
       "      <th>部门</th>\n",
       "      <th>年龄</th>\n",
       "      <th>绩效</th>\n",
       "      <th>部门平均绩效</th>\n",
       "      <th>部门内绩效差异</th>\n",
       "      <th>部门内绩效排名</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>801</td>\n",
       "      <td>B</td>\n",
       "      <td>20</td>\n",
       "      <td>17</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-12.75</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>802</td>\n",
       "      <td>B</td>\n",
       "      <td>16</td>\n",
       "      <td>45</td>\n",
       "      <td>29.75</td>\n",
       "      <td>15.25</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>803</td>\n",
       "      <td>B</td>\n",
       "      <td>38</td>\n",
       "      <td>13</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-16.75</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>804</td>\n",
       "      <td>A</td>\n",
       "      <td>16</td>\n",
       "      <td>25</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-4.75</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>805</td>\n",
       "      <td>B</td>\n",
       "      <td>44</td>\n",
       "      <td>44</td>\n",
       "      <td>29.75</td>\n",
       "      <td>14.25</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>806</td>\n",
       "      <td>A</td>\n",
       "      <td>38</td>\n",
       "      <td>20</td>\n",
       "      <td>29.75</td>\n",
       "      <td>-9.75</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>807</td>\n",
       "      <td>A</td>\n",
       "      <td>17</td>\n",
       "      <td>32</td>\n",
       "      <td>29.75</td>\n",
       "      <td>2.25</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>808</td>\n",
       "      <td>A</td>\n",
       "      <td>15</td>\n",
       "      <td>42</td>\n",
       "      <td>29.75</td>\n",
       "      <td>12.25</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    工号 部门  年龄  绩效  部门平均绩效  部门内绩效差异  部门内绩效排名\n",
       "0  801  B  20  17   29.75   -12.75      3.0\n",
       "1  802  B  16  45   29.75    15.25      1.0\n",
       "2  803  B  38  13   29.75   -16.75      4.0\n",
       "3  804  A  16  25   29.75    -4.75      3.0\n",
       "4  805  B  44  44   29.75    14.25      2.0\n",
       "5  806  A  38  20   29.75    -9.75      4.0\n",
       "6  807  A  17  32   29.75     2.25      2.0\n",
       "7  808  A  15  42   29.75    12.25      1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3['部门内绩效排名'] = df.groupby('部门').绩效.rank(ascending=False)\n",
    "df3"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
